# import data
sites <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "sites") %>%
clean_names()
ysi <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "ysi") %>%
rename_with(tolower) %>% # prevents clean_names() from adding extra underscores before caps
clean_names() %>%
mutate(site_code = if_else(site_code == "Corington", "COV",
if_else(site_code == "greenhEB", "GEB",
site_code
))) %>%
rename(site_code_depth = site_code) %>%
filter(as.character(date) != "2023-10-05") # removing incomplete/aborted day
entero <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "enterococcus") %>%
clean_names()
boats <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "boat activity") %>%
clean_names() %>%
mutate(site_code = if_else(site_code == "GIA/LBI", "GIA", site_code)) # coding merged GIA/LBI sites as GIA for now
precip <- read_excel(here("water_quality", "data_raw", "Water quality data.xlsx"), sheet = "precipitation") %>%
clean_names() %>%
right_join(data.frame(date = seq(ymd('2023-10-10'), as.Date(today()), by='day'))) %>%
mutate(precipitation_in = replace_na(precipitation_in, 0))
wq_data <- ysi %>%
select(date, site_code_depth, temperature_c = temperature_c_24, ph, do_mg_l, do_percent_sat, conductivity_us_cm, sal_psu, phycoerythrin_rfu, chlorophyll_rfu) %>%
drop_na() %>%
left_join(ysi %>%
select(date, site_code_depth, turbidity_fnu) %>%
drop_na(),
by = c("date", "site_code_depth")) %>%
mutate(site_code = str_replace(site_code_depth, "[:digit:]", ""),
depth_m = parse_number(site_code_depth),
depth_m = replace_na(depth_m, 1)) %>% # surface-only sites are assumed to be approx 1m
left_join(sites, by = c("site_code")) %>%
left_join(entero, by = c("date", "site_code")) %>%
left_join(boats, by = c("date", "site_code")) %>%
mutate(n_colonies = replace_na(n_colonies, 0),
entero_yn = if_else(n_colonies == 0, "Absent",
if_else(n_colonies > 0, "Present", "NA")),
n_boats = replace_na(n_boats, 0),
month = month(ymd(date), label = TRUE),
year = year(ymd(date)),
date_label = paste(as.character(month), as.character(year)),
date_label = if_else(as.character(date) == "2024-04-29", "May 2024", date_label),
date_cat = if_else(substr(date_label, 1, 3) %in% c("Dec", "Jan", "Feb", "Mar", "Apr"), "High season", "Low season"),
site_cat = if_else(site_code %in% c("YIE", "GIE"), "Control", "Treatment")
) %>%
select(date, date_label, date_cat, site_code_depth, site_code, depth_m, site_name, site_cat, latitude, longitude, n_boats, temperature_c, ph, do_mg_l, do_percent_sat, conductivity_us_cm, sal_psu, phycoerythrin_rfu, chlorophyll_rfu, turbidity_fnu, n_colonies, entero_yn) %>%
distinct()
# streamlining data for graphs to include only regularly monitored sites and 1m depth
data_mon <- wq_data %>%
filter(site_code %in% c("NBRB", "NBRM", "EC", "NBA", "LBI", "GIA", "ML", "RBA", "GOE", "TPBN", "GIE", "YIE", "DB", "MRCH", "COV", "GEB", "FHB", "YIN", "BDB", "LDB", "MRYC") & depth_m == 1)
# GEB currently combined as Exchange Bay - Club Hotel, also omitting EBB/EBBW, YRS, RRS
### need to organize dates from north-south
# Target ranges
date_min <- min(data_mon$date) # earliest sampling date
date_max <- max(data_mon$date) # most recent sampling date
ref_ph = data.frame(xmin = -Inf, xmax = Inf, ymin = 7.7, ymax = 8.5) # 7.5 and 8.4 units (Rogers et al., 2001)
ref_sal = data.frame(xmin = -Inf, xmax = Inf, ymin = 32, ymax = 42) # 32,000 to 42,000 ppm (NOAA)
ref_turb = data.frame(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 2) # <2 NTU (EPA)
ref_do = data.frame(xmin = -Inf, xmax = Inf, ymin = 6, ymax = Inf) # Must be greater than 6.0 ppm (EPMA, 2019)
ref_temp = data.frame(xmin = -Inf, xmax = Inf, ymin = 23, ymax = 29.6) # 23°–29°Celsius (Coral Reef Alliance)
ref_temp_bleaching = 30.63 # 30.63C bleaching threshold (NOAA)
ref_ent = data.frame(xmin = -Inf, xmax = Inf, ymin = 0, ymax = 7) # below 7/100mL (EPA)
ylims_ph <- c((min(data_mon$ph, na.rm = T) - 1), 9)
ylims_turb <- c((min(data_mon$turbidity_fnu, na.rm = T) - 1), (max(data_mon$turbidity_fnu, na.rm = T) + 1))
ylims_do <- c((min(data_mon$do_mg_l, na.rm = T) - 1), (max(data_mon$do_mg_l, na.rm = T) + 1))
ylims_temp <- c((min(data_mon$temperature_c, na.rm = T) - 1), (max(data_mon$temperature_c, na.rm = T) + 1))
ylims_phyco <- c((min(data_mon$phycoerythrin_rfu, na.rm = T) - 1), (max(data_mon$phycoerythrin_rfu, na.rm = T) + 1))
ylims_chlor <- c((min(data_mon$chlorophyll_rfu, na.rm = T) - 1), (max(data_mon$chlorophyll_rfu, na.rm = T) + 1))
ylims_entero <- c(0, (max(data_mon$n_colonies, na.rm = T) + 10))
c_range <- "gray20" # setting color for target range in graphs
a_range <- 0.2 # setting alpha (transparency) for target range in graphs
Writing formulas for standard graph types
# flipped point plots by site, with and without reference boxes
flpoint_site <- function(data_wq, y, ylab, ylims) {
ggplot() +
geom_point(data = data_wq,
aes(site_name, {{y}}, color = date_cat),
alpha = 0.6) +
labs(x = "", y = ylab, color = "Season") +
ylim(ylims) +
theme_bw() +
coord_flip()
}
flpoint_site_ref <- function(ref_data, data_wq, y, ylab, ylims) {
ggplot() +
geom_rect(data = ref_data,
aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = "What"),
alpha = a_range) +
scale_fill_manual(values = c_range, guide = "none") +
geom_point(data = data_wq,
aes(site_name, {{y}}, color = date_cat),
alpha = 0.6) +
labs(x = "", y = ylab, color = "Season") +
ylim(ylims) +
theme_bw() +
coord_flip()
}
# line plots over time, with and without reference boxes
line_time <- function(data_wq, y, ylab, ylims) {
ggplot() +
geom_line(data = data_wq,
aes(x = date, y = {{y}}, group = site_code),
color = "lightblue3") +
geom_point(data = data_wq,
aes(x = date, y = {{y}}, group = site_code),
color = "slategray") +
scale_x_datetime(date_breaks = "1 month",
date_labels = "%b") +
labs(x = "", y = ylab) +
ylim(ylims) +
theme_bw()
}
line_time_ref <- function(data_ref, data_wq, y, ylab, ylims) {
ggplot() +
geom_rect(data = data_ref,
aes(xmin = as.POSIXct(-Inf), xmax = as.POSIXct(Inf) + 10000, ymin = ymin, ymax = ymax, fill = "What"),
alpha = a_range) +
scale_fill_manual(values = c_range, guide = "none") +
geom_line(data = data_wq,
aes(x = date, y = {{y}}, group = site_code),
color = "lightblue3") +
geom_point(data = data_wq,
aes(x = date, y = {{y}}, group = site_code),
color = "slategray") +
scale_x_datetime(date_breaks = "1 month",
date_labels = "%b") +
labs(x = "", y = ylab) +
ylim(ylims) +
theme_bw()
}
# for stack overflow
ref_do = data.frame(ymin = 6, ymax = Inf)
ggplot() +
geom_rect(data = ref_do,
aes(xmin = min(data_mon$date), xmax = max(data_mon$date), ymin = 6, ymax = Inf, fill = "What"),
alpha = 0.4) +
scale_fill_manual(values = "slategray", guide = "none") +
geom_line(data = data_mon,
aes(x = date, y = do_mg_l, group = site_code),
color = "lightblue3") +
geom_point(data = data_mon,
aes(x = date, y = do_mg_l, group = site_code),
color = "slategray") +
scale_x_datetime(date_breaks = "1 month",
date_labels = "%b") +
labs(x = "Sampling date", y = expression("Dissolved oxygen (mg "*L^-1*")")) +
theme_bw()
ggsave(here("water_quality", "figs", "2023-2024", "se1.png"))
ggplot() +
geom_rect(data = ref_do,
aes(xmin = as.POSIXct("2020-01-01"), xmax = as.POSIXct("2030-01-01"), ymin = ymin, ymax = ymax, fill = "What"),
alpha = 0.4) +
scale_fill_manual(values = "slategray", guide = "none") +
geom_line(data = data_mon,
aes(x = date, y = do_mg_l, group = site_code),
color = "lightblue3") +
geom_point(data = data_mon,
aes(x = date, y = do_mg_l, group = site_code),
color = "slategray") +
scale_x_datetime(date_breaks = "1 month",
date_labels = "%b") +
labs(x = "Sampling date", y = expression("Dissolved oxygen (mg "*L^-1*")")) +
theme_bw()
ggsave(here("water_quality", "figs", "2023-2024", "se2.png"))
ggplot() +
geom_rect(data = ref_do,
aes(xmin = as.POSIXct(-Inf), xmax = as.POSIXct(Inf), ymin = ymin, ymax = ymax, fill = "What"),
alpha = 0.4) +
scale_fill_manual(values = "slategray", guide = "none") +
geom_line(data = data_mon,
aes(x = date, y = do_mg_l, group = site_code),
color = "lightblue3") +
geom_point(data = data_mon,
aes(x = date, y = do_mg_l, group = site_code),
color = "slategray") +
scale_x_datetime(date_breaks = "1 month",
date_labels = "%b") +
labs(x = "Sampling date", y = expression("Dissolved oxygen (mg "*L^-1*")")) +
xlim(min(data_mon$date), max(data_mon$date)) +
theme_bw()
ggsave(here("water_quality", "figs", "2023-2024", "se3.png"))
To do:
flpoint_site_ref(ref_ph, data_mon, ph, "pH", ylims_ph)
line_time_ref(ref_ph, data_mon, ph, "pH", ylims_ph)
flpoint_site_ref(ref_turb, data_mon, turbidity_fnu, "Turbidity (FNU)", ylims_turb)
line_time_ref(ref_turb, data_mon, turbidity_fnu, "Turbidity (FNU)", ylims_turb)
flpoint_site_ref(ref_do, data_mon, do_mg_l, expression("Dissolved oxygen (mg "*L^-1*")"), ylims_do)
line_time_ref(ref_do, data_mon, do_mg_l, expression("Dissolved oxygen (mg "*L^-1*")"), ylims_do)
flpoint_site_ref(ref_ent, data_mon, n_colonies, expression("Enterococcus (colonies "*mL^-1*")"), ylims_entero)
line_time_ref(ref_ent, data_mon, n_colonies, expression("Enterococcus (colonies "*mL^-1*")"), ylims_entero)
flpoint_site(data_mon, chlorophyll_rfu, "Chlorophyll a (RFU)", ylims_chlor)
line_time(data_mon, chlorophyll_rfu, "Chlorophyll a (RFU)", ylims_chlor)
flpoint_site(data_mon, phycoerythrin_rfu, "Phycoerythrin (RFU)", ylims_phyco)
line_time(data_mon, phycoerythrin_rfu, "Phycoerythrin (RFU)", ylims_phyco)
# temperature
ggplot() +
# geom_rect(data = rect_dat_temp,
# aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = "What"),
# alpha = 0.4) +
geom_hline(yintercept = ref_temp_bleaching, color = "salmon", linetype = "dashed") +
# scale_fill_manual(values = c_range, guide = "none") +
geom_line(data = data_mon,
aes(x = date, y = temperature_c, group = site_code)) +
scale_x_datetime(date_breaks = "1 month",
date_labels = "%b") +
labs(x = "", y = expression("Temperature ("*~degree*C*")")) +
theme_bw()
# dual y attempts
scale = .01 # relates y axis 1 with y axis 2
point_precip <- function(data, y, ylab, scale) {
ggplot() +
geom_point(data = data,
aes(x = date, y = {{y}}),
color = "salmon") +
geom_line(data = precip,
aes(x = date, y = precipitation_in/scale),
color = "black") +
scale_y_continuous(
name = ylab,
sec.axis = sec_axis(~.*scale, name = "Precipitation (in/day)")
) +
labs(x = "") +
theme_bw()
}
point_precip(data_mon, n_colonies, expression("Enterococcus (colonies "*mL^-1*")"), 0.01)
point_precip(data_mon, turbidity_fnu, "Turbidity (FNU)", .1)
Boat play
ggplot(data_mon %>% filter(site_code %in% c("GIA", "RBA", "NBA", "GOE")),
aes(x = n_boats, y = n_colonies)) +
geom_point(color = "slategray", alpha = 0.8) +
labs(x = "Number of boats in anchorage", y = expression("Enterococcus (colonies "*mL^-1*")"), 0.01) +
theme_bw()
Precipitation play
ggplot() +
geom_line(data = precip,
aes(x = date, y = precipitation_in)) +
labs(x = "", y = "Precipitation (inches)") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# trying to calculate the amount of rainfall preceding a testing day
ysi_dates <- ysi %>%
select(date) %>%
distinct() # need to clarify which dates were actual monitoring days
precip_wk <- ysi %>%
select(end_date = date) %>%
distinct() %>%
mutate(start_date = end_date - days(7)) %>%
mutate(period = interval(ymd(start_date), ymd(end_date))) %>% # find a way to label this
fuzzy_left_join(precip, by = c("start_date" = "date", "end_date" = "date"),
match_fun = list(`<=`, `>=`)) %>%
group_by(period) %>%
summarize(precip_wkprior = sum(precipitation_in))
precip_48hr <- ysi %>%
select(end_date = date) %>%
distinct() %>%
mutate(start_date = end_date - days(2)) %>%
mutate(period = interval(ymd(start_date), ymd(end_date))) %>% # find a way to label this
fuzzy_left_join(precip, by = c("start_date" = "date", "end_date" = "date"),
match_fun = list(`<=`, `>=`)) %>%
group_by(period) %>%
summarize(precip_wkprior = sum(precipitation_in))
Map play
# Antigua shapefile
atg <- st_read(here("mapping", "shapefiles", "atg_adm_2019_shp", "atg_admbnda_adm1_2019.shp")) %>%
st_union() %>%
st_sf()
## Reading layer `atg_admbnda_adm1_2019' from data source
## `/Users/gizmo/Github/emc/mapping/shapefiles/atg_adm_2019_shp/atg_admbnda_adm1_2019.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -62.34903 ymin: 16.93153 xmax: -61.65653 ymax: 17.72958
## Geodetic CRS: WGS 84
# set lat/lon for target range
lons = c(-61.72, -61.65)
lats = c(17.03, 17.10)
ggplot() +
geom_sf(data = atg, fill = "slategray", color = "slategray") + # ATG basemap
coord_sf(xlim = lons, ylim = lats, expand = FALSE) + # setting map limits
geom_point(data_mon,
mapping = aes(x = longitude, y = latitude, color = entero_yn, size = n_colonies),
alpha = 0.7) + # add sites
scale_color_manual(values = c("turquoise", "tomato")) +
facet_wrap(. ~ factor(date_label, levels = c("Oct 2023", "Nov 2023", "Dec 2023", "Jan 2024", "Feb 2024", "Mar 2024", "Apr 2024", "May 2024", "Jun 2024"))) +
labs(size = expression("Colonies "*mL^-1),
color = "Enterococcus detection") +
theme_bw() +
theme(axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank())
ggsave(here("water_quality", "figs", "2023-2024", "entero_map.png"), width = 10, height = 8)
Sargassum play
sites_sarg <- c("EBB", "EBBW", "YIE", "GIE")
sarg <- function(y, ylab) {
ggplot(wq_data %>% filter(site_code %in% sites_sarg),
aes(x = date, y = {{y}})) +
geom_point(aes(color = as.character(depth_m))) +
facet_grid(. ~ site_cat) +
theme_bw()
}
sarg(ph, "pH")
sarg(do_mg_l, "DO")
sarg(turbidity_fnu, "Turbidity (FNU)")